20191002

kernel

  • Cat and Dog problem

  • A simple geometric solution

  • A more general solution

Dot product \(\vec a\vec b=a_xb_x+a_yb_y=|\vec a||\vec b|\cos(\theta)\)

Exercise:

\(g(x)=<C_+-C_-,X-C>=<C_+,X>-<C_-,X>-<C_+,C>+<C_-,C>\);

\(<C_+,X>=<\frac1{n_{+}}\sum\limits_{l\in I_+}^nx_i,x>\);

\(<C_-,X>=<\frac1{n_{-}}\sum\limits_{l\in I_-}^nx_i,x>\);

\(<C_+,C>=<C_+,\frac12C_+>+<C_+,\frac12C_->=\frac1{2n_{+}^2}\sum\limits_{(i,j)\in I_{+}}<x_i,x_j>+\frac12<C_+,C_->\)

\(<C_-,C>=<C_-,\frac12C_+>+<C_-,\frac12C_->=\frac12<C_+,C_->+\frac1{2n_{-}^2}\sum\limits_{(i,j)\in I_{-}}<x_i,x_j>\)

\(g(x)=\sum_{l=1}^n\alpha_i<x_i,x>+b\),

\(b=\frac12\left[\frac1{n_{-}^2}\sum\limits_{(i,j)\in I_{-}}<x_i,x_j>-\frac1{n_{+}^2}\sum\limits_{(i,j)\in I_{+}}<x_i,x_j>\right]\)

\(\alpha_i=\begin{cases}\frac1{n_{+}}&y_i=+1\\-\frac1{n_{-}}&y_i=-1\end{cases}\)

  • Import the iris data
rm(list=ls())
library(datasets)
data(iris)
iris <- iris[1:100,]
iris$class <- NA
library(ggplot2)
GGally::ggpairs(iris,mapping=aes(color =Species,shape=Species,alpha=0.3),
        columns=c("Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species"))+theme_light()

  • Define the train and test sets
iris_setosa <- iris[iris$Species=="setosa",]
iris_versicolor <- iris[iris$Species=="versicolor",]

iris_test<- rbind(iris_setosa[41:50,],iris_versicolor[41:50,])

iris_train<- rbind(iris_setosa[1:40,],iris_versicolor[1:40,])

iris_train_setosa<- iris_setosa[1:40,]

iris_train_versicolor<- iris_versicolor[1:40,]
  • Define a kernel function and classifier
k <- function(x,y){return(x %*% t(y))}
classifier <- function(test,train_po,train_ne){
  
  for(i in 1:nrow(train_po)){
    a_po = 1/nrow(train_po)*sum(k(as.matrix(test),train_po[i,]))
  }
  for(i in 1:nrow(train_ne)){
    a_ne = 1/nrow(train_ne)*sum(k(as.matrix(test),train_ne[i,]))
  }
  
  for(i in 1:(nrow(train_po)-1)){
    for (j in (i+1):nrow(train_po)){
      b_po = -0.5*(1/nrow(train_po)^2)*sum(k(as.matrix(train_po[i,]),train_po[j,]))
    }
  }
  for(i in 1:(nrow(train_ne)-1)){
    for (j in (i+1):nrow(train_ne)){
      b_ne = 0.5*(1/nrow(train_ne)^2)*sum(k(as.matrix(train_ne[i,]),train_ne[j,]))
    }
  }
  
  class = a_po+a_ne+b_po+b_ne
  return(class)
}

for (i in 1:20){
iris_test[i,6] <- classifier(iris_test[i,1:4],iris_train_setosa[,1:4],iris_train_versicolor[,1:4])
}
  • define a 3D grid
kd <- with(iris_test, MASS::kde2d(iris_train$Petal.Length,iris_train$Petal.Width, n = 20))
kd <- MASS::kde2d(iris_train$Petal.Length,iris_train$Petal.Width, n = 20)
  • Plot the test and 3D grid
library(plotly)
plot_ly (type='surface',x=kd$x,y=kd$y,z =kd$z, opacity=0.95)%>%add_markers( x = ~iris_test$Petal.Length, y = ~iris_test$Petal.Width, z = ~iris_test$class-2, color = ~iris_test$Species,size =10, symbol=iris_test$Species, symbols = c('circle-open','cross'))
  • Another kernel function and another way of computing the classifier
k = function(x,y)  return(sum(x*y)+1)

k.pp=outer(1:40,1:40,Vectorize(function(i,j) k(iris_train_setosa[i,1:4],iris_train_setosa[j,1:4])))
k.mm=outer(1:40,1:40,Vectorize(function(i,j) k(iris_train_versicolor[i,1:4],iris_train_versicolor[j,1:4])))
b=(sum(k.mm)/(40^2)-sum(k.pp)/(40^2))/2
alpha=ifelse(iris_train$Species=="setosa",1/40,-1/40)

k.x=outer(1:80,1:20,Vectorize(function(i,j) k(iris_train[i,1:4],iris_test[j,1:4])))
iris_test[,6]=(t(k.x)%*%alpha+b)

z.zero=matrix(0,nrow=20,ncol=20)
  • Define a grid
g.n=20
x.min=min(iris_test[,3:4])
x.max=max(iris_test[,3:4])
z.hat=matrix(NA,nrow=g.n,ncol=g.n)
g=seq(from=x.min,to=x.max,length.out=g.n)
for (i in (1:g.n)){
  for (j in (1:g.n)){
    u=c(0,0,g[i],g[j])
    k.x=outer(1:80,1,Vectorize(function(i,j) k(as.matrix(iris_train[i,1:4]),u)))
    z.hat[i,j]=sum(k.x*alpha)+b
  }
}
  • Plot the test and grid
plot_ly (type = 'surface' , x=g,y=g,z=(z.hat), opacity = 0.9 ,showscale =T)%>%
  add_markers( x = iris_test$Petal.Length, y = iris_test$Petal.Width, z = iris_test$class, 
               color =iris_test$Species, size=1,
               symbol=iris_test$Species, symbols = c('circle-open','cross'),
               opacity = 1)%>%
  layout(scene = list(xaxis = list(title = 'Petal.Length'),
                     yaxis = list(title = 'Petal.Width'),
                     zaxis = list(title = 'Class')))
  • Evaluate the classifier
iris_test$evaluate=ifelse(iris_test$class>0,"setosa","versicolor")
length(which(iris_test$Spicies==iris_test$evaluate))/20
## [1] 0

Error rate = 0%

# # #

f